library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.7     ✔ dplyr   1.0.9
✔ tidyr   1.2.0     ✔ stringr 1.4.0
✔ readr   2.1.2     ✔ forcats 0.5.1
── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(janitor)

Attaching package: ‘janitor’

The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test
library(skimr)
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
library(tsibble)

Attaching package: ‘tsibble’

The following objects are masked from ‘package:base’:

    intersect, setdiff, union
library(lubridate)

Attaching package: ‘lubridate’

The following object is masked from ‘package:tsibble’:

    interval

The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union
beds <- read_csv("raw_data/non_covid_raw_data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>% 
  clean_names()
Warning: One or more parsing issues, see `problems()` for details
Rows: 30458 Columns: 20
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, Specialty, SpecialtyQF, SpecialtyName, SpecialtyNameQF
dbl  (5): AllStaffedBeddays, TotalOccupiedBeddays, AverageAvailableStaffedBeds, AverageOccupiedBeds, PercentageOccupancy
lgl  (5): AllStaffedBeddaysQF, TotalOccupiedBeddaysQF, AverageAvailableStaffedBedsQF, AverageOccupiedBedsQF, PercentageOccupancyQF

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(beds)
skim(beds)
view(beds)
beds %>% 
  arrange(desc(all_staffed_beddays))
beds %>% 
  distinct(location)

beds %>% 
  distinct(hb)

beds %>% 
  distinct(all_staffed_beddays)

beds %>% 
  distinct(total_occupied_beddays)
beds %>% 
  ggplot(aes(x = total_occupied_beddays)) +
  geom_histogram(col = "white")+
  scale_x_log10()
beds %>% 
  ggplot(aes(x = average_occupied_beds)) +
  geom_histogram(col = "white")+
  scale_x_log10()
beds %>% 
  ggplot(aes(x = percentage_occupancy)) +
  geom_histogram(col = "white")
beds_select <- beds %>% 
  mutate(date = yq(quarter),
         year = year(date),
         month = month(date, label = TRUE, abbr = FALSE),
         season = case_when(
           str_detect(month, "January") ~ "Winter",
           str_detect(month, "April") ~ "Spring",
           str_detect(month, "July") ~ "Summer",
           str_detect(month, "October") ~ "Autumn"),
         season = factor(season, order = TRUE)) %>% 
  select(quarter, hb, location, specialty_name, all_staffed_beddays, total_occupied_beddays, average_available_staffed_beds, average_occupied_beds, percentage_occupancy, date, year, month, season)
beds_select %>% 
  filter(hb == "S92000003")
beds_select %>%
  filter(!is.na(percentage_occupancy),
         specialty_name == "All Acute") %>% 
  group_by(quarter) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(group = 1)

beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_col() 
beds_select %>%
  filter(!is.na(total_occupied_beddays)) %>% 
  group_by(quarter) %>% 
  summarise(mean_occupied_beddays = mean(total_occupied_beddays)) %>% 
  ggplot(aes(x = quarter,
             y = mean_occupied_beddays)) +
  geom_point() +
  geom_line(group = 1)
beds_select %>%
  filter(!is.na(all_staffed_beddays)) %>% 
  group_by(quarter) %>% 
  summarise(mean_staffed_beddays = mean(all_staffed_beddays)) %>% 
  ggplot(aes(x = quarter,
             y = mean_staffed_beddays)) +
  geom_point() +
  geom_line(group = 1)
beds_select %>%
  filter(!is.na(average_available_staffed_beds)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_staffed_beddays = mean(average_available_staffed_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_staffed_beddays)) +
  geom_point() +
  geom_line(group = 1)
beds_select %>%
  filter(!is.na(average_available_staffed_beds)) %>% 
  mutate(empty_beddays = all_staffed_beddays - total_occupied_beddays) %>% 
  group_by(quarter) %>%
  summarise(mean_empty = mean(empty_beddays)) %>% 
  ggplot(aes(x = quarter,
             y= mean_empty)) +
  geom_point() +
  geom_line(group = 1)
beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter, hb) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(aes(group = hb, colour = hb))
beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter, specialty_name, hb) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>%
  filter(mean_percentage_occupancy > 50) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(aes(group = specialty_name, colour = specialty_name)) +
  theme(legend.position = "none") +
  facet_wrap(~ hb)
beds_select %>% 
  distinct(specialty_name)
beds_select %>% 
  filter(specialty_name %in% c("All Acute", "Accident & Emergency", "Intensive Care Medicine")) %>% 
  group_by(quarter, specialty_name) %>% 
  summarise(mean_pct = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_pct)) +
  geom_point() +
  geom_line(aes(group = specialty_name, colour = specialty_name))
beds_select %>% 
  filter(location == "S08000019",
         quarter == "2017Q1")
beds_select %>%
  filter(!is.na(percentage_occupancy),
         percentage_occupancy == 100) %>% 
  group_by(quarter, hb) %>% 
  summarise(mean_avg_occupied_beds = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_occupied_beds)) +
  geom_point() +
  geom_line(aes(group = hb, colour = hb))

`

beds_select %>%
  filter(!is.na(average_occupied_beds)) %>% 
  group_by(quarter) %>% 
  summarise(mean_average_occupied_beds = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_average_occupied_beds)) +
  geom_point() +
  geom_line(group = 1)
beds_select %>% 
  group_by(year) %>% 
  summarise(mean_year_bed = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = year,
             y = mean_year_bed)) +
  geom_line()
beds_select %>% 
  group_by(season) %>% 
  summarise(mean_year_bed = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = season,
             y = mean_year_bed)) +
  geom_col() + 
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
beds_select %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(season) %>% 
  summarise(mean_prct_beds = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = season,
             y = mean_prct_beds)) +
  geom_col() +
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
beds_select %>% 
  filter(!is.na(total_occupied_beddays)) %>% 
  group_by(season) %>% 
  summarise(mean_occupied_beddays = mean(total_occupied_beddays)) %>% 
  ggplot(aes(x = season,
             y = mean_occupied_beddays)) +
  geom_col() +
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
beds_select %>% 
  filter(quarter == "2016Q4")
beds_select %>% 
  ggplot(aes(x = season,
             y = percentage_occupancy)) +
  geom_col() +
  facet_wrap(~ specialty_name)
beds_select %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(specialty_name, month) %>% 
  summarise(mean_pct_speciality = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = month,
             y = mean_pct_speciality)) +
  geom_point() +
  geom_line(aes(group = specialty_name, colour = specialty_name))+
  theme(legend.position = "none")
beds_select %>%
  filter(percentage_occupancy == 100,
         year == 2017) %>% 
  group_by(season, hb) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(x = season,
             y = count)) +
  geom_col(aes(fill = hb), position = "dodge")
beds_select %>% 
  mutate(bins = 
    case_when(percentage_occupancy < 25 ~ "<25",
              percentage_occupancy < 50 ~ "25-50",
              percentage_occupancy < 75 ~ "50-75",
              percentage_occupancy > 75 ~ ">75"
    )
  ) %>% 
  filter(bins == "<25") %>% 
  group_by(season) %>% 
  summarise(count = n())
beds_select %>% 
  filter(specialty_name == "Accident & Emergency") %>%
  group_by(quarter) %>% 
  summarise(mean_pct = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_pct)) +
  geom_point() +
  geom_line(group = 1)

simd_treatment <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv") %>% clean_names()
Rows: 40821 Columns: 18
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType, AdmissionTypeQF, SIMDQF, AverageLengthOfEpisodeQF, AverageLengthOfStayQF
dbl  (7): SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOfStay, AverageLengthOfStay

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
simd_treatment %>% 
  filter(hb == "S92000003")
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(group = 1)

simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, admission_type) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = admission_type, colour = admission_type))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>% 
  filter(!is.na(average_length_of_stay),
         simd == 5) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>% 
  filter(!is.na(average_length_of_stay),
         simd == 1) %>% 
  mutate(simd = replace_na(simd, 0)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
Error in `mutate()`:
! Problem while computing `simd = replace_na(simd, 0)`.
Caused by error in `vec_assign()`:
! Can't convert `replace` <double> to match type of `data` <factor<cd34a>>.
Backtrace:
  1. ... %>% ggplot(aes(x = quarter, y = mean_avg_stay))
 11. tidyr:::replace_na.default(simd, 0)
 12. vctrs::vec_assign(data, missing, replace, x_arg = "data", value_arg = "replace")
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)

simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, admission_type) %>% 
  summarise(mean_avg_epidsode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_epidsode)) +
  geom_point() +
  geom_line(aes(group = admission_type, colour = admission_type))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_epidsode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_epidsode)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>% 
  filter(!is.na(average_length_of_episode),
         simd == 5) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)

simd_treatment %>% 
  filter(!is.na(average_length_of_episode),
         simd == 1) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)
simd_treatment %>% 
  filter(!is.na(episodes)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_epidsode = mean(episodes)) %>% 
  ggplot(aes(x = quarter,
             y = mean_epidsode)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

simd_treatment %>% 
  filter(is.na(simd))
simd_treatment %>% 
  filter(!is.na(stays)) %>% 
  group_by(quarter, simd) %>% 
  summarise(total_stays = sum(stays)) %>% 
  ggplot(aes(x = quarter,
             y = total_stays)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))

age_and_sex <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv") %>%  clean_names()
Rows: 129393 Columns: 18
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType, AdmissionTypeQF, Sex, Age, AverageLengthOfEpisodeQF, AverageLengthOfStayQF
dbl  (6): Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOfStay, AverageLengthOfStay

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
age_and_sex %>% 
  filter(sex == "Female",
         age == "0-9 years")
age_and_sex %>%
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter) %>% 
  summarise(avg_length_of_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_stay)) +
  geom_point() +
  geom_line(group = 1)

age_and_sex %>% 
  group_by(quarter, age) %>% 
  summarise(length_of_stay = mean(length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = length_of_stay)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

age_and_sex %>%
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, age) %>% 
  summarise(avg_length_of_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_stay)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

age_and_sex %>%
  filter(!is.na(average_length_of_episode),
         age == "10-19 years",
         sex == "Female") %>% 
  group_by(quarter, age) %>% 
  summarise(avg_length_of_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_episode)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

age_and_sex %>%
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, age) %>% 
  summarise(avg_length_of_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_episode)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
age_and_sex %>%
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, sex) %>% 
  summarise(avg_length_of_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_episode)) +
  geom_point() +
  geom_line(aes(group = sex,
                colour = sex))
`summarise()` has grouped output by 'quarter'. You can override using the `.groups` argument.

speciality %>% 
  filter(hb == "S92000003")
speciality %>% 
  group_by(quarter) %>% 
  summarise(mean_length_of_episode = mean(length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_length_of_episode)) +
  geom_point() +
  geom_line(group = 1) 
speciality %>% 
  group_by(quarter) %>% 
  summarise(mean_length_of_episode = mean(length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_length_of_episode)) +
  geom_point() +
  geom_line(group = 1) 
speciality %>% 
  group_by(quarter) %>% 
  summarise(mean_length_of_spell = mean(length_of_spell)) %>% 
  ggplot(aes(x = quarter,
             y = mean_length_of_spell)) +
  geom_point() +
  geom_line(group = 1)

waiting_times <- read_csv("raw_data/non_covid_raw_data/monthly_ae_waitingtimes_202206.csv") %>% clean_names()
Rows: 15837 Columns: 25
── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Country, HBT, TreatmentLocation, DepartmentType, NumberOfAttendancesEpisodeQF, NumberMeetingTargetEpisodeQF, DischargeDestinationAdmissionToSameQF, DischargeDest...
dbl (12): Month, NumberOfAttendancesAggregate, NumberOfAttendancesEpisode, NumberMeetingTargetAggregate, NumberMeetingTargetEpisode, DischargeDestinationAdmissionToSame, D...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waiting_times <- waiting_times %>% 
  mutate(date = ym(month),
         quarter = quarter(date))
waiting_times %>% 
  mutate(pct = number_meeting_target_aggregate / number_of_attendances_aggregate * 100) %>%
  group_by(date) %>% 
  summarise(mean_pct = mean(pct)) %>% 
  ggplot(aes(x = date,
             y = mean_pct)) +
  geom_line() +
  geom_smooth()
waiting_times %>% 
  filter(department_type == "Emergency Department") %>% 
  mutate(pct = number_meeting_target_aggregate / number_of_attendances_aggregate * 100) %>%
  group_by(date) %>% 
  summarise(mean_pct = mean(pct)) %>% 
  ggplot(aes(x = date,
             y = mean_pct)) +
  geom_line() +
  geom_smooth()
waiting_times %>% 
  filter(department_type == "Minor Injury Unit or Other") %>% 
  mutate(pct = number_meeting_target_aggregate / number_of_attendances_aggregate * 100) %>%
  group_by(date) %>% 
  summarise(mean_pct = mean(pct)) %>% 
  ggplot(aes(x = date,
             y = mean_pct)) +
  geom_line() +
  geom_smooth()

library(fable)
Loading required package: fabletools
beds_fore <- beds_select %>%
  mutate(quarter = yearquarter(date)) %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter) %>% 
  summarise(mean_pct = mean(percentage_occupancy))


beds_fore <- as_tsibble(beds_fore, index = quarter, validate = FALSE) %>% 
  select(quarter, mean_pct)

beds_fore <- beds_fore %>% 
  filter_index(~ "2019 Q4")

beds_fore %>% 
  autoplot(mean_pct)

fit <- beds_fore %>% 
  model(
    snaive = SNAIVE(mean_pct),
    mean_model = MEAN(mean_pct),
    arima = ARIMA(mean_pct)
  )
forecast_1 <- fit %>% 
  fabletools::forecast(h=12)

forecast_1
forecast_1 %>% 
  autoplot(beds_fore) +
  guides(colour = guide_legend(title = "Forecast"))

wait_fore <- waiting_times %>% 
  mutate(month = ym(month),
         month = yearmonth(month)) %>% 
  filter(!is.na(prop_residence)) %>% 
  group_by(month) %>% 
  summarise(mean_prop_residence = mean(prop_residence))

wait_fore <- as_tsibble(wait_fore, index = month, validate = FALSE)

wait_fore <- wait_fore %>% 
  filter_index(~ "2019-12-01")

wait_fore %>% 
  autoplot(mean_prop_residence)

fit_wait <- wait_fore %>% 
  model(
    snaive = SNAIVE(mean_prop_residence),
    mean_model = MEAN(mean_prop_residence),
    arima = ARIMA(mean_prop_residence)
  )
forecast_wait <- fit_wait %>% 
  fabletools::forecast(h=30)

forecast_wait
forecast_wait %>% 
  autoplot(wait_fore) +
  guides(colour = guide_legend(title = "Forecast"))

---
title: "R Notebook"
output: html_notebook
---

```{r}
library(tidyverse)
library(janitor)
library(skimr)
library(tsibble)
library(lubridate)
```


```{r}
beds <- read_csv("raw_data/non_covid_raw_data/beds_by_nhs_board_of_treatment_and_specialty.csv") %>% 
  clean_names()
```


```{r}
glimpse(beds)
```


```{r}
skim(beds)
```


```{r}
view(beds)
```


```{r}
beds %>% 
  arrange(desc(all_staffed_beddays))
```


```{r}
beds %>% 
  distinct(location)

beds %>% 
  distinct(hb)

beds %>% 
  distinct(all_staffed_beddays)

beds %>% 
  distinct(total_occupied_beddays)
```


```{r}
beds %>% 
  ggplot(aes(x = all_staffed_beddays)) +
  geom_histogram(col = "white")+
  scale_x_log10()
```


```{r}
beds %>% 
  ggplot(aes(x = total_occupied_beddays)) +
  geom_histogram(col = "white")+
  scale_x_log10()
```


```{r}
beds %>% 
  ggplot(aes(x = average_occupied_beds)) +
  geom_histogram(col = "white")+
  scale_x_log10()
```


```{r}
beds %>% 
  ggplot(aes(x = percentage_occupancy)) +
  geom_histogram(col = "white")
```


```{r}
beds_select <- beds %>% 
  mutate(date = yq(quarter),
         year = year(date),
         month = month(date, label = TRUE, abbr = FALSE),
         season = case_when(
           str_detect(month, "January") ~ "Winter",
           str_detect(month, "April") ~ "Spring",
           str_detect(month, "July") ~ "Summer",
           str_detect(month, "October") ~ "Autumn"),
         season = factor(season, order = TRUE)) %>% 
  select(quarter, hb, location, specialty_name, all_staffed_beddays, total_occupied_beddays, average_available_staffed_beds, average_occupied_beds, percentage_occupancy, date, year, month, season)
```


```{r}
beds_select %>% 
  filter(hb == "S92000003")
```


```{r}
beds_select %>%
  filter(!is.na(percentage_occupancy),
         specialty_name == "All Acute") %>% 
  group_by(quarter) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_col() 
```


```{r}
beds_select %>%
  filter(!is.na(total_occupied_beddays)) %>% 
  group_by(quarter) %>% 
  summarise(mean_occupied_beddays = mean(total_occupied_beddays)) %>% 
  ggplot(aes(x = quarter,
             y = mean_occupied_beddays)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(all_staffed_beddays)) %>% 
  group_by(quarter) %>% 
  summarise(mean_staffed_beddays = mean(all_staffed_beddays)) %>% 
  ggplot(aes(x = quarter,
             y = mean_staffed_beddays)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(average_available_staffed_beds)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_staffed_beddays = mean(average_available_staffed_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_staffed_beddays)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
beds_select %>%
  filter(!is.na(average_available_staffed_beds)) %>% 
  mutate(empty_beddays = all_staffed_beddays - total_occupied_beddays) %>% 
  group_by(quarter) %>%
  summarise(mean_empty = mean(empty_beddays)) %>% 
  ggplot(aes(x = quarter,
             y= mean_empty)) +
  geom_point() +
  geom_line(group = 1)
```



```{r}
beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter, hb) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(aes(group = hb, colour = hb))
```


```{r}
beds_select %>%
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter, specialty_name, hb) %>% 
  summarise(mean_percentage_occupancy = mean(percentage_occupancy)) %>%
  filter(mean_percentage_occupancy > 50) %>% 
  ggplot(aes(x = quarter,
             y = mean_percentage_occupancy)) +
  geom_point() +
  geom_line(aes(group = specialty_name, colour = specialty_name)) +
  theme(legend.position = "none") +
  facet_wrap(~ hb)
```


```{r}
beds_select %>% 
  distinct(specialty_name)
```


```{r}
beds_select %>% 
  filter(specialty_name %in% c("All Acute", "Accident & Emergency", "Intensive Care Medicine")) %>% 
  group_by(quarter, specialty_name) %>% 
  summarise(mean_pct = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_pct)) +
  geom_point() +
  geom_line(aes(group = specialty_name, colour = specialty_name))
```


```{r}
beds_select %>% 
  filter(location == "S08000019",
         quarter == "2017Q1")
```


```{r}
beds_select %>%
  filter(!is.na(percentage_occupancy),
         percentage_occupancy == 100) %>% 
  group_by(quarter, hb) %>% 
  summarise(mean_avg_occupied_beds = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_occupied_beds)) +
  geom_point() +
  geom_line(aes(group = hb, colour = hb))
```


`
```{r}
beds_select %>%
  filter(!is.na(average_occupied_beds)) %>% 
  group_by(quarter) %>% 
  summarise(mean_average_occupied_beds = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = quarter,
             y = mean_average_occupied_beds)) +
  geom_point() +
  geom_line(group = 1)
```



```{r}
beds_select %>% 
  group_by(year) %>% 
  summarise(mean_year_bed = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = year,
             y = mean_year_bed)) +
  geom_line()
```


```{r}
beds_select %>% 
  group_by(season) %>% 
  summarise(mean_year_bed = mean(average_occupied_beds)) %>% 
  ggplot(aes(x = season,
             y = mean_year_bed)) +
  geom_col() + 
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
```



```{r}
beds_select %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(season) %>% 
  summarise(mean_prct_beds = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = season,
             y = mean_prct_beds)) +
  geom_col() +
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
```


```{r}
beds_select %>% 
  filter(!is.na(total_occupied_beddays)) %>% 
  group_by(season) %>% 
  summarise(mean_occupied_beddays = mean(total_occupied_beddays)) %>% 
  ggplot(aes(x = season,
             y = mean_occupied_beddays)) +
  geom_col() +
  scale_x_discrete(limits = c("Spring", "Summer", "Autumn", "Winter"))
```


```{r}
beds_select %>% 
  filter(quarter == "2016Q4")
```


```{r}
beds_select %>% 
  ggplot(aes(x = season,
             y = percentage_occupancy)) +
  geom_col() +
  facet_wrap(~ specialty_name)
```


```{r}
beds_select %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(specialty_name, month) %>% 
  summarise(mean_pct_speciality = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = month,
             y = mean_pct_speciality)) +
  geom_point() +
  geom_line(aes(group = specialty_name, colour = specialty_name))+
  theme(legend.position = "none")
```



```{r}
beds_select %>%
  filter(percentage_occupancy == 100,
         year == 2017) %>% 
  group_by(season, hb) %>% 
  summarise(count = n()) %>% 
  ggplot(aes(x = season,
             y = count)) +
  geom_col(aes(fill = hb), position = "dodge")

```


```{r}
beds_select %>% 
  mutate(bins = 
    case_when(percentage_occupancy < 25 ~ "<25",
              percentage_occupancy < 50 ~ "25-50",
              percentage_occupancy < 75 ~ "50-75",
              percentage_occupancy > 75 ~ ">75"
    )
  ) %>% 
  filter(bins == "<25") %>% 
  group_by(season) %>% 
  summarise(count = n())
```



```{r}
beds_select %>% 
  filter(specialty_name == "Accident & Emergency") %>%
  group_by(quarter) %>% 
  summarise(mean_pct = mean(percentage_occupancy)) %>% 
  ggplot(aes(x = quarter,
             y = mean_pct)) +
  geom_point() +
  geom_line(group = 1)
```



################################################################################
################################################################################
################################################################################





```{r}
simd_treatment <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv") %>% clean_names()
```


```{r}
simd_treatment <- simd_treatment %>% 
  mutate(simd = as.factor(simd))
```


```{r}
simd_treatment %>% 
  filter(hb == "S92000003")
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, admission_type) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = admission_type, colour = admission_type))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay),
         simd == 5) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay),
         simd == 1) %>% 
  mutate(simd = replace_na(simd, 0)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```



```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_stay)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```



```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, admission_type) %>% 
  summarise(mean_avg_epidsode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_epidsode)) +
  geom_point() +
  geom_line(aes(group = admission_type, colour = admission_type))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_avg_epidsode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_epidsode)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode),
         simd == 5) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
simd_treatment %>% 
  filter(!is.na(average_length_of_episode),
         simd == 1) %>% 
  group_by(quarter) %>% 
  summarise(mean_avg_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_avg_episode)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
simd_treatment %>% 
  filter(!is.na(episodes)) %>% 
  group_by(quarter, simd) %>% 
  summarise(mean_epidsode = mean(episodes)) %>% 
  ggplot(aes(x = quarter,
             y = mean_epidsode)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```


```{r}
simd_treatment %>% 
  filter(is.na(simd))
```


```{r}
simd_treatment %>% 
  filter(!is.na(stays)) %>% 
  group_by(quarter, simd) %>% 
  summarise(total_stays = sum(stays)) %>% 
  ggplot(aes(x = quarter,
             y = total_stays)) +
  geom_point() +
  geom_line(aes(group = simd, colour = simd))
```



################################################################################
################################################################################
################################################################################


```{r}
age_and_sex <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv") %>%  clean_names()
```


```{r}
age_and_sex <- age_and_sex %>% 
  mutate(sex = as.factor(sex))
```


```{r}
age_and_sex %>% 
  filter(sex == "Female",
         age == "0-9 years")
```


```{r}
age_and_sex %>%
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter) %>% 
  summarise(avg_length_of_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_stay)) +
  geom_point() +
  geom_line(group = 1)
```


```{r}
age_and_sex %>% 
  group_by(quarter, age) %>% 
  summarise(length_of_stay = mean(length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = length_of_stay)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
```


```{r}
age_and_sex %>%
  filter(!is.na(average_length_of_stay)) %>% 
  group_by(quarter, age) %>% 
  summarise(avg_length_of_stay = mean(average_length_of_stay)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_stay)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
```


```{r}
age_and_sex %>%
  filter(!is.na(average_length_of_episode),
         age == "10-19 years",
         sex == "Female") %>% 
  group_by(quarter, age) %>% 
  summarise(avg_length_of_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_episode)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
```


```{r}
age_and_sex %>%
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, age) %>% 
  summarise(avg_length_of_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_episode)) +
  geom_point() +
  geom_line(aes(group = age,
                colour = age))
```



```{r}
age_and_sex %>%
  filter(!is.na(average_length_of_episode)) %>% 
  group_by(quarter, sex) %>% 
  summarise(avg_length_of_episode = mean(average_length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = avg_length_of_episode)) +
  geom_point() +
  geom_line(aes(group = sex,
                colour = sex))
```



################################################################################
################################################################################
################################################################################



```{r}
speciality <- read_csv("raw_data/non_covid_raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_specialty.csv") %>% clean_names()
```


```{r}
speciality %>% 
  filter(hb == "S92000003")
```


```{r}
speciality %>% 
  group_by(quarter) %>% 
  summarise(mean_length_of_episode = mean(length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_length_of_episode)) +
  geom_point() +
  geom_line(group = 1) 
```


```{r}
speciality %>% 
  group_by(quarter) %>% 
  summarise(mean_length_of_episode = mean(length_of_episode)) %>% 
  ggplot(aes(x = quarter,
             y = mean_length_of_episode)) +
  geom_point() +
  geom_line(group = 1) 
```


```{r}
speciality %>% 
  group_by(quarter) %>% 
  summarise(mean_length_of_spell = mean(length_of_spell)) %>% 
  ggplot(aes(x = quarter,
             y = mean_length_of_spell)) +
  geom_point() +
  geom_line(group = 1)
```




################################################################################
################################################################################
################################################################################


```{r}
waiting_times <- read_csv("raw_data/non_covid_raw_data/monthly_ae_waitingtimes_202206.csv") %>% clean_names()
```


```{r}
waiting_times <- waiting_times %>% 
  mutate(date = ym(month),
         quarter = quarter(date))
```


```{r}
waiting_times %>% 
  mutate(pct = number_meeting_target_aggregate / number_of_attendances_aggregate * 100) %>%
  group_by(date) %>% 
  summarise(mean_pct = mean(pct)) %>% 
  ggplot(aes(x = date,
             y = mean_pct)) +
  geom_line() +
  geom_smooth()
```


```{r}
waiting_times %>% 
  filter(department_type == "Emergency Department") %>% 
  mutate(pct = number_meeting_target_aggregate / number_of_attendances_aggregate * 100) %>%
  group_by(date) %>% 
  summarise(mean_pct = mean(pct)) %>% 
  ggplot(aes(x = date,
             y = mean_pct)) +
  geom_line() +
  geom_smooth()
```


```{r}
waiting_times %>% 
  filter(department_type == "Minor Injury Unit or Other") %>% 
  mutate(pct = number_meeting_target_aggregate / number_of_attendances_aggregate * 100) %>%
  group_by(date) %>% 
  summarise(mean_pct = mean(pct)) %>% 
  ggplot(aes(x = date,
             y = mean_pct)) +
  geom_line() +
  geom_smooth()
```



```{r}
waiting_times <- waiting_times %>% 
  mutate(
    prop_admission_to_same = (discharge_destination_admission_to_same/number_of_attendances_aggregate)
  ) %>% 
  mutate(prop_other_speciality = (discharge_destination_other_specialty/number_of_attendances_aggregate)) %>% 
  mutate(prop_residence = (discharge_destination_residence/number_of_attendances_aggregate)) %>% 
  mutate(prop_transfer = (discharge_destination_transfer/number_of_attendances_aggregate)) %>% 
  mutate(prop_unknown = (discharge_destination_unknown/number_of_attendances_aggregate)) 
```

```{r}
waiting_times %>% 
  filter(!is.na(prop_admission_to_same)) %>% 
  group_by(date) %>% 
  summarise(mean_admission_to_same = mean(prop_admission_to_same)) %>% 
  ggplot(aes(x = date,
         y = mean_admission_to_same)) +
  geom_line() +
  scale_x_date(date_breaks = "6 months", date_labels =  "%b %Y") +
  geom_vline(xintercept = as.numeric(as.Date("2008-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2009-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2010-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2011-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2012-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2014-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2015-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2016-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2017-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2018-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2019-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4)

waiting_times %>% 
  filter(!is.na(prop_other_speciality)) %>% 
  group_by(date) %>% 
  summarise(mean_other_speciality = mean(prop_other_speciality)) %>% 
  ggplot(aes(x = date,
         y = mean_other_speciality)) +
  geom_line() +
  geom_vline(xintercept = as.numeric(as.Date("2008-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2009-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2010-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2011-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2012-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2014-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2015-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2016-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2017-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2018-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2019-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4)

waiting_times %>% 
  filter(!is.na(prop_residence)) %>% 
  group_by(date) %>% 
  summarise(mean_residence = mean(prop_residence)) %>% 
  ggplot(aes(x = date,
         y = mean_residence)) +
  geom_line()+
  scale_x_date(date_breaks = "6 months", date_labels =  "%b %Y") +
  geom_vline(xintercept = as.numeric(as.Date("2008-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2009-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2010-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2011-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2012-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2014-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2015-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2016-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2017-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2018-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2019-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4)

waiting_times %>% 
  filter(!is.na(prop_transfer)) %>% 
  group_by(date) %>% 
  summarise(mean_transfer = mean(prop_transfer)) %>% 
  ggplot(aes(x = date,
         y = mean_transfer)) +
  geom_line() +
  scale_x_date(date_breaks = "6 months", date_labels =  "%b %Y") +
  geom_vline(xintercept = as.numeric(as.Date("2008-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2009-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2010-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2011-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2012-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2014-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2015-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2016-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2017-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2018-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2019-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4)

waiting_times %>% 
  filter(!is.na(prop_unknown)) %>% 
  group_by(date) %>% 
  summarise(mean_unknown = mean(prop_unknown)) %>% 
  ggplot(aes(x = date,
         y = mean_unknown)) +
  geom_line() +
  scale_x_date(date_breaks = "6 months", date_labels =  "%b %Y") +
  geom_vline(xintercept = as.numeric(as.Date("2008-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2009-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2010-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2011-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2012-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2013-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2014-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2015-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2016-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2017-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2018-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2019-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2020-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2021-01-01")), linetype=4)+
  geom_vline(xintercept = as.numeric(as.Date("2022-01-01")), linetype=4)
```



################################################################################
################################################################################
################################################################################



```{r}
library(fable)
```


```{r}
beds_fore <- beds_select %>%
  mutate(quarter = yearquarter(date)) %>% 
  filter(!is.na(percentage_occupancy)) %>% 
  group_by(quarter) %>% 
  summarise(mean_pct = mean(percentage_occupancy))


beds_fore <- as_tsibble(beds_fore, index = quarter, validate = FALSE) %>% 
  select(quarter, mean_pct)

beds_fore <- beds_fore %>% 
  filter_index(~ "2019 Q4")

beds_fore %>% 
  autoplot(mean_pct)
```


```{r}
fit <- beds_fore %>% 
  model(
    snaive = SNAIVE(mean_pct),
    mean_model = MEAN(mean_pct),
    arima = ARIMA(mean_pct)
  )
```


```{r}
forecast_1 <- fit %>% 
  fabletools::forecast(h=12)

forecast_1
```


```{r}
forecast_1 %>% 
  autoplot(beds_fore) +
  guides(colour = guide_legend(title = "Forecast"))
```




```{r}
wait_fore <- waiting_times %>% 
  mutate(month = ym(month),
         month = yearmonth(month)) %>% 
  filter(!is.na(prop_residence)) %>% 
  group_by(month) %>% 
  summarise(mean_prop_residence = mean(prop_residence))

wait_fore <- as_tsibble(wait_fore, index = month, validate = FALSE)

wait_fore <- wait_fore %>% 
  filter_index(~ "2019-12-01")

wait_fore %>% 
  autoplot(mean_prop_residence)
```


```{r}
fit_wait <- wait_fore %>% 
  model(
    snaive = SNAIVE(mean_prop_residence),
    mean_model = MEAN(mean_prop_residence),
    arima = ARIMA(mean_prop_residence)
  )
```


```{r}
forecast_wait <- fit_wait %>% 
  fabletools::forecast(h=30)

forecast_wait
```



```{r}
forecast_wait %>% 
  autoplot(wait_fore) +
  guides(colour = guide_legend(title = "Forecast"))
```





